capture log close
clear all
macro drop _all
set more off
cd "C:\Users\alice\OneDrive\Alice QUANTONOMICS\Input Substitution\Stata\1. NZ Data"
log using log/NZasset1, replace text

/*--------------------------------------------------------------------------
Project: Quantonomics DNSP Opex Cost Function Development Program
Author:  Michael Cunningham
File:    NZasset1.do
Initial Date:    22-Oct-2022
Revised Date:	 01-May-2025

Purpose: Read NZ DNSP data for RAB, and selected components of AUC for 
	the period 2005 to 2012 from the Commerce Commission workbook:
	"Electricity-distributors-information-disclosure-data-20082012.xls".
	Save the extracted data in the Stata dataset: nzrab1.dta.

--------------------------------------------------------------------------*/

tempfile nzdatA nzdatB 

*--------------------------------------------------------------------------
* Part 1: Read data from sheet "SUM02"
*--------------------------------------------------------------------------

import excel data/Electricity-distributors-information-disclosure-data-20082012.xls, ///
	sheet("SUM02") cellrange(B5:IA74) allstring clear
	
replace C = "rab_sfa" if C == "Regulatory Value at End of Previous Year*"
replace C = "rabC_sfa" if C == "Regulatory Value of System Fixed Assets at Year End"
replace C = "rab_nsfa" if C == "Regulatory value at end of previous year"
replace C = "rabC_nsfa" if C == "Regulatory Value of Non-System Fixed Assets at Year end"
replace C = "dep_sfa" if C == "Regulatory Depreciation (incl. value of assets decommissioned)"
replace C = "dep_nsfa" if C == 	"Depreciation (incl. value of assets decommissioned)"
replace C = "inf_sfa" if C == "Indexed Revaluation"
replace C = "inf_nsfa" if C == "Revaluations"	
replace C = "Total_RAB" if C == "Total Regulatory Asset Base Value (excluding FDC)"	


replace C = "DNSPname" in 1
replace C = "Year" in 2

keep if C == "DNSPname" | C == "Year" | C == "rab_sfa" | C == "rabC_sfa"    ///
	| C == "rab_nsfa" | C == "rabC_nsfa" | C == "dep_sfa" | C == "dep_nsfa" ///
	| C == "inf_sfa" | C == "inf_nsfa" | C == "Total_RAB"
drop B

* Fill missing values in row 1
foreach k in E 	F 	G 	H 	I 	J 	K {
	replace `k' = D in 1 
}
foreach k in M	N	O	P	Q	R	S {
	replace `k' = L in 1 
}
foreach k in U	V	W	X	Y	Z	AA {
	replace `k' = T in 1	
}
foreach k in AC	AD	AE	AF	AG	AH	AI	{
	replace `k' = AB in 1 	
}
foreach k in AK	AL	AM	AN	AO	AP	AQ	{
	replace `k' = AJ in 1 	
}
foreach k in AS	AT	AU	AV	AW	AX	AY	{
	replace `k' = AR in 1 	
}
foreach k in BA	BB	BC	BD	BE	BF	BG	{
	replace `k' = AZ in 1 	
}
foreach k in BI	BJ	BK	BL	BM	BN	BO	{
	replace `k' = BH in 1 	
}
foreach k in BQ	BR	BS	BT	BU	BV	BW	{
	replace `k' = BP in 1 	
}
foreach k in BY	BZ	CA	CB	CC	CD	CE	{
	replace `k' = BX in 1 	
}
foreach k in CG	CH	CI	CJ	CK	CL	CM	{
	replace `k' = CF in 1 	
}
foreach k in CO	CP	CQ	CR	CS	CT	CU	{
	replace `k' = CN in 1 	
}
foreach k in CW	CX	CY	CZ	DA	DB	DC	{
	replace `k' = CV in 1 	
}
foreach k in DE	DF	DG	DH	DI	DJ	DK	{
	replace `k' = DD in 1 	
}
foreach k in DM	DN	DO	DP	DQ	DR	DS	{
	replace `k' = DL in 1 	
}
foreach k in DU	DV	DW	DX	DY	DZ	EA	{
	replace `k' = DT in 1 	
}
foreach k in EC	ED	EE	EF	EG	EH	EI	{
	replace `k' = EB in 1 	
}
foreach k in EK	EL	EM	EN	EO	EP	EQ	{
	replace `k' = EJ in 1 	
}
foreach k in ES	ET	EU	EV	EW	EX	EY	{
	replace `k' = ER in 1 	
}
foreach k in FA	FB	FC	FD	FE	FF	FG	{
	replace `k' = EZ in 1 	
}
foreach k in FI	FJ	FK	FL	FM	FN	FO	{
	replace `k' = FH in 1 	
}
foreach k in FQ	FR	FS	FT	FU	FV	FW	{
	replace `k' = FP in 1 	
}
foreach k in FY	FZ	GA	GB	GC	GD	GE	{
	replace `k' = FX in 1 	
}
foreach k in GG	GH	GI	GJ	GK	GL	GM	{
	replace `k' = GF in 1 	
}
foreach k in GO	GP	GQ	GR	GS	GT	GU	{
	replace `k' = GN in 1 	
}
foreach k in GW	GX	GY	GZ	HA	HB	HC	{
	replace `k' = GV in 1 	
}
foreach k in HE	HF	HG	HH	HI	HJ	HK	{
	replace `k' = HD in 1 	
}
foreach k in HM	HN	HO	HP	HQ	HR	HS	{
	replace `k' = HL in 1 	
}
foreach k in HU	HV	HW	HX	HY	HZ	IA {
	replace `k' = HT in 1 	
}

* Combine labels from rows 2 and 1
foreach var of varlist * {
    quietly replace `var' = "z_" + `var'[2] + "_" + `var' in 1
}
*Replacing variable names with values in first row 
foreach var of varlist * {
   local vname = strtoname(`var'[1])
   rename `var' `vname'
}
drop in 1/2

rename z_Year_DNSPname f_
destring z_*, replace

export excel xlsx/namescheck.xlsx, firstrow(var) sheet("original", replace)
describe, f

* Transpose data
reshape long z_, i(f_) j(DNSPyear) string
describe, f

reshape wide z_, i(DNSPyear) j(f_) string
describe, f
export excel xlsx/namescheck.xlsx, firstrow(var) sheet("transposed", replace)

* Split DNSPyear into eiid and year
gen year = substr(DNSPyear,1,4)
gen dnspname = substr(DNSPyear,6,.)
destring year, replace

gen int eiid = 0
replace eiid = 2001 if dnspname == "Vector_Lines_Limited"
replace eiid = 2002 if dnspname == "Powerco_Limited"
replace eiid = 2003 if dnspname == "Unison_Networks"
replace eiid = 2004 if dnspname == "WEL_Networks"
replace eiid = 2005 if dnspname == "Aurora_Energy"
replace eiid = 2006 if dnspname == "Northpower_Limited"
replace eiid = 2007 if dnspname == "Electra_Limited"
replace eiid = 2008 if dnspname == "Network_Tasman_Limited"
replace eiid = 2009 if dnspname == "Counties_Power"
replace eiid = 2010 if dnspname == "Mainpower_New_Zealand"
replace eiid = 2011 if dnspname == "The_Power_Company"
replace eiid = 2012 if dnspname == "Top_Energy_Limited"
replace eiid = 2013 if dnspname == "Alpine_Energy_Limited"
replace eiid = 2014 if dnspname == "Eastland_Network"
replace eiid = 2015 if dnspname == "Horizon_Energy_Distributi"
replace eiid = 2016 if dnspname == "Marlborough_Lines_Limited"
replace eiid = 2017 if dnspname == "The_Lines_Company"
replace eiid = 2018 if dnspname == "Waipa_Networks_Limited"
replace eiid = 2019 if dnspname == "Wellington_Electricity_Li"
drop if eiid == 0

label define dnsps 2001	"Vector Lines" 2002	"Powerco" 2003 "Unison Networks" ///
	2004 "WEL Networks" 2005 "Aurora Energy" 2006 "Northpower" 2007	"Electra" ///
	2008 "Network Tasman" 2009 "Counties Power" 2010 "MainPower NZ" 2011 ///
	"The Power Company" 2012 "Top Energy" 2013	"Alpine Energy" 2014 ///
	"Eastland Network" 2015	"Horizon Energy" 2016 "Marlborough Lines" 2017 ///
	"The Lines Company" 2018 "Waipa Networks" 2019 "Wellington Electricity"
label values eiid dnsps
drop dnspname DNSPyear

* Combine system and non-system assets etc
gen orab = z_rab_nsfa + z_rab_sfa
gen crab = z_rabC_nsfa + z_rabC_sfa
gen depr = z_dep_nsfa + z_dep_sfa
gen infl = z_inf_nsfa + z_inf_sfa

drop z_*
order eiid year
sort eiid year

replace orab = . if eiid == 2019 & year <= 2009
replace crab = . if eiid == 2019 & year <= 2008
replace depr = . if eiid == 2019 & year <= 2008
replace infl = . if eiid == 2019 & year <= 2008

*Creating the Straight Line Depreciation
replace depr = depr + infl

describe
tabmiss
export excel xlsx/namescheck.xlsx, firstrow(var) sheet("final", replace)

save `nzdatA'

*--------------------------------------------------------------------------
* Part 2: Read data from sheet "SUM01"
*--------------------------------------------------------------------------

import excel data/Electricity-distributors-information-disclosure-data-20082012.xls, ///
	sheet("SUM01") cellrange(C5:ER104) allstring clear

replace C = "tax" if C == "Regulatory Tax Allowance"
replace C = "Stat_Tax" if C =="Statutory Tax Rate"
replace C = "DNSPname" in 1
replace C = "Year" in 2

keep if C == "tax"	| C == "DNSPname" | C == "Year" | C == "Stat_Tax"

list C	
describe, detail

* Fill missing values in row 1
foreach k in E F G H {
	replace `k' = D in 1 
}
foreach k in J K L M {
	replace `k' = I in 1 
}
foreach k in O P Q R {
	replace `k' = N in 1	
}
foreach k in T U V W {
	replace `k' = S in 1 	
}
foreach k in Y Z AA AB {
	replace `k' = X in 1 	
}
foreach k in AD AE AF AG {
	replace `k' = AC in 1 	
}
foreach k in AI AJ AK AL {
	replace `k' = AH in 1 	
}
foreach k in AN AO AP AQ {
	replace `k' = AM in 1 	
}
foreach k in AS AT AU AV {
	replace `k' = AR in 1 	
}
foreach k in AX AY AZ BA {
	replace `k' = AW in 1 	
}
foreach k in BC BD BE BF {
	replace `k' = BB in 1 	
}
foreach k in BH BI BJ BK {
	replace `k' = BG in 1 	
}
foreach k in BM BN BO BP {
	replace `k' = BL in 1 	
}
foreach k in BR BS BT BU {
	replace `k' = BQ in 1 	
}
foreach k in BW BX BY BZ {
	replace `k' = BV in 1 	
}
foreach k in CB CC CD CE {
	replace `k' = CA in 1 	
}
foreach k in CG CH CI CJ {
	replace `k' = CF in 1 	
}
foreach k in CL CM CN CO {
	replace `k' = CK in 1 	
}
foreach k in CQ CR CS CT {
	replace `k' = CP in 1 	
}
foreach k in CV CW CX CY {
	replace `k' = CU in 1 	
}
foreach k in DA DB DC DD {
	replace `k' = CZ in 1 	
}
foreach k in DF DG DH DI {
	replace `k' = DE in 1 	
}
foreach k in DK DL DM DN {
	replace `k' = DJ in 1 	
}
foreach k in DP DQ DR DS {
	replace `k' = DO in 1 	
}
foreach k in DU DV DW DX {
	replace `k' = DT in 1 	
}
foreach k in DZ EA EB EC {
	replace `k' = DY in 1 	
}
foreach k in EE EF EG EH {
	replace `k' = ED in 1 	
}
foreach k in EJ EK EL EM {
	replace `k' = EI in 1 	
}
foreach k in EO EP EQ ER {
	replace `k' = EN in 1 	
}

* Combine labels from rows 2 and 1
foreach var of varlist * {
    quietly replace `var' = "z_" + `var'[2] + "_" + `var' in 1
}
*Replacing variable names with values in first row 
foreach var of varlist * {
   local vname = strtoname(`var'[1])
   rename `var' `vname'
}

drop in 1/2

rename z_Year_DNSPname f_
destring z_*, replace

export excel xlsx/namescheck2.xlsx, firstrow(var) sheet("original", replace)
describe, f

reshape long z_, i(f_) j(DNSPyear) string
describe, f
export excel xlsx/namescheck2.xlsx, firstrow(var) sheet("transposed", replace)

reshape wide z_, i(DNSPyear) j(f_) string
describe, f

rename z_tax tax


gen year = substr(DNSPyear,1,4)
gen dnspname = substr(DNSPyear,6,.)
drop DNSPyear
destring year, replace

gen int eiid = 0
replace eiid = 2001 if dnspname == "Vector_Lines_Limited"
replace eiid = 2002 if dnspname == "Powerco_Limited"
replace eiid = 2003 if dnspname == "Unison_Networks"
replace eiid = 2004 if dnspname == "WEL_Networks"
replace eiid = 2005 if dnspname == "Aurora_Energy"
replace eiid = 2006 if dnspname == "Northpower_Limited"
replace eiid = 2007 if dnspname == "Electra_Limited"
replace eiid = 2008 if dnspname == "Network_Tasman_Limited"
replace eiid = 2009 if dnspname == "Counties_Power"
replace eiid = 2010 if dnspname == "Mainpower_New_Zealand"
replace eiid = 2011 if dnspname == "The_Power_Company"
replace eiid = 2012 if dnspname == "Top_Energy_Limited"
replace eiid = 2013 if dnspname == "Alpine_Energy_Limited"
replace eiid = 2014 if dnspname == "Eastland_Network"
replace eiid = 2015 if dnspname == "Horizon_Energy_Distributi"
replace eiid = 2016 if dnspname == "Marlborough_Lines_Limited"
replace eiid = 2017 if dnspname == "The_Lines_Company"
replace eiid = 2018 if dnspname == "Waipa_Networks_Limited"
replace eiid = 2019 if dnspname == "Wellington_Electricity_Li"
drop if eiid == 0

label define dnsps 2001	"Vector Lines" 2002	"Powerco" 2003 "Unison Networks" ///
	2004 "WEL Networks" 2005 "Aurora Energy" 2006 "Northpower" 2007	"Electra" ///
	2008 "Network Tasman" 2009 "Counties Power" 2010 "MainPower NZ" 2011 ///
	"The Power Company" 2012 "Top Energy" 2013	"Alpine Energy" 2014 ///
	"Eastland Network" 2015	"Horizon Energy" 2016 "Marlborough Lines" 2017 ///
	"The Lines Company" 2018 "Waipa Networks" 2019 "Wellington Electricity"
label values eiid dnsps
drop dnspname
sort eiid year
order eiid year tax

replace tax = . if eiid == 2019 & year == 2008

describe
export excel xlsx/namescheck2.xlsx, firstrow(var) sheet("final", replace)

save `nzdatB'



*--------------------------------------------------------------------------
* Part 4: Combine the data & save
*--------------------------------------------------------------------------

use `nzdatA', clear
merge 1:1 eiid year using `nzdatB'

tabulate eiid _merge
tabulate year _merge
drop _merge


export excel using xlsx/nzasset1.xlsx, firstrow(var) replace
save dta/nzasset1, replace

describe
tabmiss


log close
exit


